{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 6 Pandas的函数应用"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "# Numpy ufunc 函数，randn跟的是维数\n",
    "df = pd.DataFrame(np.random.randn(5,4) - 1)\n",
    "print(df)\n",
    "\n",
    "print(np.abs(df)) #绝对值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:47:22.221877Z",
     "start_time": "2025-01-07T06:47:21.921101Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0  0.465819 -0.321208 -1.308760  0.975991\n",
      "1 -1.524750 -0.341895 -0.781658  0.204757\n",
      "2 -0.839421  1.918311 -1.399502 -0.597238\n",
      "3 -0.685636 -0.610916 -1.233845  0.396872\n",
      "4 -0.347737 -2.372889 -1.950433 -1.215931\n",
      "          0         1         2         3\n",
      "0  0.465819  0.321208  1.308760  0.975991\n",
      "1  1.524750  0.341895  0.781658  0.204757\n",
      "2  0.839421  1.918311  1.399502  0.597238\n",
      "3  0.685636  0.610916  1.233845  0.396872\n",
      "4  0.347737  2.372889  1.950433  1.215931\n"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "#apply默认作用在列上,x是每一列,因为axis=0\n",
    "print(df.apply(lambda x : x.max()))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:51:02.191143Z",
     "start_time": "2025-01-07T06:51:02.188561Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.465819\n",
      "1    1.918311\n",
      "2   -0.781658\n",
      "3    0.975991\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "#apply作用在行上\n",
    "print(df.apply(lambda x : x.max(), axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:51:21.708795Z",
     "start_time": "2025-01-07T06:51:21.705102Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.975991\n",
      "1    0.204757\n",
      "2    1.918311\n",
      "3    0.396872\n",
      "4   -0.347737\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "source": [
    "# 使用applymap应用到每个数据\n",
    "print(df.map(lambda x : '%.2f' % x))\n",
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:54:30.317063Z",
     "start_time": "2025-01-07T06:54:30.312759Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2      3\n",
      "0   0.47  -0.32  -1.31   0.98\n",
      "1  -1.52  -0.34  -0.78   0.20\n",
      "2  -0.84   1.92  -1.40  -0.60\n",
      "3  -0.69  -0.61  -1.23   0.40\n",
      "4  -0.35  -2.37  -1.95  -1.22\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    float64\n",
       "1    float64\n",
       "2    float64\n",
       "3    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "str"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type('%.2f' % 1.3456)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:27:27.258699100Z",
     "start_time": "2024-04-11T03:27:27.224712Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": "## 6.4 索引排序（不重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# Series\n",
    "print(np.random.randint(5, size=5))\n",
    "print('-'*50)\n",
    "s4 = pd.Series(range(10, 15), index = np.random.randint(5, size=5)) #索引随机生成\n",
    "print(s4)\n",
    "print('-'*50)\n",
    "# 索引排序,sort_index返回一个新的排好索引的series\n",
    "print(s4.sort_index())\n",
    "print(s4)\n",
    "# s4.loc[0:3]  loc索引值不唯一时直接报错\n",
    "print(s4.iloc[0:3])\n",
    "s4[0:3]  #默认用的位置索引"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:56:44.263840Z",
     "start_time": "2025-01-07T06:56:44.258365Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 2 0 2 1]\n",
      "--------------------------------------------------\n",
      "1    10\n",
      "2    11\n",
      "0    12\n",
      "2    13\n",
      "4    14\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "0    12\n",
      "1    10\n",
      "2    11\n",
      "2    13\n",
      "4    14\n",
      "dtype: int64\n",
      "1    10\n",
      "2    11\n",
      "0    12\n",
      "2    13\n",
      "4    14\n",
      "dtype: int64\n",
      "1    10\n",
      "2    11\n",
      "0    12\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1    10\n",
       "2    11\n",
       "0    12\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# s4.loc[1:2] #loc索引值唯一时可以切片"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df4 = pd.DataFrame(np.random.randn(5, 5),\n",
    "                   index=np.random.randint(5, size=5),\n",
    "                   columns=np.random.randint(5, size=5))\n",
    "print(df4)\n",
    "#轴零是行索引排序\n",
    "df4_isort = df4.sort_index(axis=0, ascending=False)\n",
    "print(df4_isort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T06:57:49.471650Z",
     "start_time": "2025-01-07T06:57:49.465658Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          2         3         1         3         0\n",
      "2  0.481626  0.267296 -0.951306 -0.167853 -0.215942\n",
      "0 -0.590575 -0.441858  0.380532  1.001047  1.564096\n",
      "3 -1.641073 -1.698706  0.205363 -0.206155 -0.668905\n",
      "3  0.716457  0.461260  0.742063 -1.594768 -0.449151\n",
      "4  1.401914 -0.229332 -1.285855 -0.761922  0.502977\n",
      "          2         3         1         3         0\n",
      "4  1.401914 -0.229332 -1.285855 -0.761922  0.502977\n",
      "3 -1.641073 -1.698706  0.205363 -0.206155 -0.668905\n",
      "3  0.716457  0.461260  0.742063 -1.594768 -0.449151\n",
      "2  0.481626  0.267296 -0.951306 -0.167853 -0.215942\n",
      "0 -0.590575 -0.441858  0.380532  1.001047  1.564096\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "source": [
    "#轴1是列索引排序\n",
    "df4_isort = df4.sort_index(axis=1, ascending=True)\n",
    "print(df4_isort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T06:58:16.127791Z",
     "start_time": "2025-01-07T06:58:16.124266Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3         3\n",
      "2 -0.215942 -0.951306  0.481626  0.267296 -0.167853\n",
      "0  1.564096  0.380532 -0.590575 -0.441858  1.001047\n",
      "3 -0.668905  0.205363 -1.641073 -1.698706 -0.206155\n",
      "3 -0.449151  0.742063  0.716457  0.461260 -1.594768\n",
      "4  0.502977 -1.285855  1.401914 -0.229332 -0.761922\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "cell_type": "markdown",
   "source": "# 6.5 按值排序（机器学习，深度学习不重要，数据分析才需要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 按值排序,by后是column的值\n",
    "import random\n",
    "l=[random.randint(0,100) for i in range(24)] #生成24个随机数\n",
    "df4 = pd.DataFrame(np.array(l).reshape(6,4)) #生成6行4列的dataframe\n",
    "# print(df4) #查看数据,ndarray\n",
    "# print('-'*50)\n",
    "print(df4)\n",
    "print('-'*50)\n",
    "#按轴零排序，by后是列名,交换的是行\n",
    "df4_vsort = df4.sort_values(by=3,axis=0, ascending=False) #寻找的是columns里的3,重要\n",
    "print(df4_vsort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T07:00:28.834245Z",
     "start_time": "2025-01-07T07:00:28.830291Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    0   1   2   3\n",
      "0  43  84  86  31\n",
      "1  48  65  19  89\n",
      "2  44  30   8  73\n",
      "3   6   5  66  49\n",
      "4  40  44  15  90\n",
      "5  10  42  33  20\n",
      "--------------------------------------------------\n",
      "    0   1   2   3\n",
      "4  40  44  15  90\n",
      "1  48  65  19  89\n",
      "2  44  30   8  73\n",
      "3   6   5  66  49\n",
      "0  43  84  86  31\n",
      "5  10  42  33  20\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    2   0   1    3\n",
      "0  58  65  21   61\n",
      "1  35  88  25   86\n",
      "2  63  65  28    5\n",
      "3  95  64  45   23\n",
      "4  57   0  92  100\n",
      "5  89  52  57   24\n"
     ]
    }
   ],
   "source": [
    "#按轴1排序，by后行索引名，交换的是列\n",
    "df4_vsort = df4.sort_values(by=3,axis=1, ascending=False) #寻找的是index里的3\n",
    "print(df4_vsort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:36:15.358985700Z",
     "start_time": "2024-04-11T03:36:15.309976600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": "# 6.6 处理缺失数据（重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "df_data = pd.DataFrame([np.random.randn(3), [1., 2., np.nan],\n",
    "                       [np.nan, 4., np.nan], [1., 2., 3.]])\n",
    "print(df_data.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T07:27:11.073444Z",
     "start_time": "2025-01-07T07:27:11.068397Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0 -1.264229 -0.514817  1.420473\n",
      "1  1.000000  2.000000       NaN\n",
      "2       NaN  4.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "source": [
    "df_data.iloc[2,0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T07:27:34.604151Z",
     "start_time": "2025-01-07T07:27:34.600928Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "nan"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "code",
   "source": [
    "#isnull来判断是否有空的数据\n",
    "print(df_data.isnull())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T07:29:47.748249Z",
     "start_time": "2025-01-07T07:29:47.744914Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    1\n",
      "1    0\n",
      "2    2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T07:30:23.416178Z",
     "start_time": "2025-01-07T07:30:23.412763Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#帮我计算df_data缺失率\n",
    "print(df_data.isnull().sum()/len(df_data))"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.25\n",
      "1    0.00\n",
      "2    0.50\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 删除缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#默认一个样本，任何一个特征缺失，就删除\n",
    "#inplace True是修改的是原有的df\n",
    "#subset=[0]是指按第一列来删除,第一列有空值就删除对应的行\n",
    "print(df_data.dropna(subset=[0]))\n",
    "# df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T07:28:20.533946Z",
     "start_time": "2025-01-07T07:28:20.529026Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0 -1.264229 -0.514817  1.420473\n",
      "1  1.000000  2.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T07:28:58.374142Z",
     "start_time": "2025-01-07T07:28:58.369104Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0 -1.264229 -0.514817  1.420473\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.264229</td>\n",
       "      <td>-0.514817</td>\n",
       "      <td>1.420473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1\n",
      "0  0.039062\n",
      "1  2.000000\n",
      "2  4.000000\n",
      "3  2.000000\n"
     ]
    }
   ],
   "source": [
    "#用的不多，用在某个特征缺失太多时，才会进行删除\n",
    "print(df_data.dropna(axis=1))  #某列由nan就删除该列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:39:58.138252400Z",
     "start_time": "2024-04-11T06:39:58.106261Z"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T07:32:04.356548Z",
     "start_time": "2025-01-07T07:32:04.351917Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0 -1.264229 -0.514817  1.420473\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.264229</td>\n",
       "      <td>-0.514817</td>\n",
       "      <td>1.420473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 21
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 填充缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "#均值，中位数，众数填充"
  },
  {
   "cell_type": "code",
   "source": [
    "#给零列的空值填为-100，按特征（按列）去填充\n",
    "print(df_data.iloc[:,0].fillna(-100.))\n",
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T07:33:13.647613Z",
     "start_time": "2025-01-07T07:33:13.642268Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     -1.264229\n",
      "1      1.000000\n",
      "2   -100.000000\n",
      "3      1.000000\n",
      "Name: 0, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0 -1.264229 -0.514817  1.420473\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.264229</td>\n",
       "      <td>-0.514817</td>\n",
       "      <td>1.420473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "source": [
    "#依次拿到每一列\n",
    "for i in df_data.columns:\n",
    "    print(df_data.loc[:,i])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T07:34:06.519750Z",
     "start_time": "2025-01-07T07:34:06.516346Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -1.264229\n",
      "1    1.000000\n",
      "2         NaN\n",
      "3    1.000000\n",
      "Name: 0, dtype: float64\n",
      "0   -0.514817\n",
      "1    2.000000\n",
      "2    4.000000\n",
      "3    2.000000\n",
      "Name: 1, dtype: float64\n",
      "0    1.420473\n",
      "1         NaN\n",
      "2         NaN\n",
      "3    3.000000\n",
      "Name: 2, dtype: float64\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T07:35:30.919530Z",
     "start_time": "2025-01-07T07:35:30.916262Z"
    }
   },
   "cell_type": "code",
   "source": "df_data.iloc[:,0].fillna(-100.,inplace=True) #inplace=True后面会被删除",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\41507\\AppData\\Local\\Temp\\ipykernel_10424\\3395572301.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
      "\n",
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
      "\n",
      "\n",
      "  df_data.iloc[:,0].fillna(-100.,inplace=True)\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T07:37:49.794275Z",
     "start_time": "2025-01-07T07:37:49.791340Z"
    }
   },
   "cell_type": "code",
   "source": "df_data.iloc[:,2]=df_data.iloc[:,2].fillna(df_data.iloc[:,2].mean()) #用均值填充空值",
   "outputs": [],
   "execution_count": 27
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T07:37:55.580904Z",
     "start_time": "2025-01-07T07:37:55.575901Z"
    }
   },
   "cell_type": "code",
   "source": "df_data",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "            0         1         2\n",
       "0   -1.264229 -0.514817  1.420473\n",
       "1    1.000000  2.000000  2.210236\n",
       "2 -100.000000  4.000000  2.210236\n",
       "3    1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.264229</td>\n",
       "      <td>-0.514817</td>\n",
       "      <td>1.420473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.210236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-100.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.210236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 28
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
